package water.rapids;

import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import water.Key;
import water.TestUtil;
import water.DKV;
import water.fvec.Frame;
import water.fvec.Vec;

public class RefCntTest extends TestUtil {
  @BeforeClass public static void setup() { stall_till_cloudsize(1); }

  @Test public void testNoTest() { /*defeat junit complaining about no tests in file*/ }

  // Test basic Copy-On-Write optimization is working, by witnessing that the
  // correct (small) number of real vec copies are made, despite many virtual
  // copies being made.
  @Test
  public void testBasic() {
    Session session = new Session();
    Frame crimes = parseTestFile(Key.make("chicagoCrimes10k.hex"),"smalldata/chicago/chicagoCrimes10k.csv.zip");
    Vec.VectorGroup vg = crimes.anyVec().group();

    // Expect to compute and update crimes.hex "Date" column in-place, but the
    // result is called py_1.  Exactly 1 new vector is made (result of +)
    int key1 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertTrue(crimes.vec("Date").isTime());
    Rapids.exec("(tmp= py_1 (:= chicagoCrimes10k.hex (+ (cols_py chicagoCrimes10k.hex \"Date\") 1) 2 []))",session);
    Assert.assertTrue(crimes.vec("Date").isTime());// User named frame is unchanged
    Frame py_1 = DKV.getGet(Key.make("py_1"));
    Assert.assertTrue(py_1.vec("Date").isNumeric()); // tmp= py_1 holds the changed column
    Assert.assertTrue(py_1.vec("Date").mean() > 1300000000L); // msec since epoch is generally >1.3b msec
    int key2 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key1+1,key2); // Exactly 1 new vector is made: as.Date

    // Remove original hex key - even though most columns are shared.  Note
    // that this remove is only valid when done in the session context -
    // otherwise the sharing can't be tracked.  Since most columns are shared,
    // the DKV key should be removed, but NOT most data.
    Rapids.exec("(rm chicagoCrimes10k.hex)",session); crimes = null;
    for( Vec vec : py_1.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key_tmp = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key2,key_tmp); // No New Vectors, and VecGroup never rolls backwards

    // Both append, and nuke a dead temp, in one expression
    Rapids.exec("(, (tmp= py_2 (append py_1 (day (cols_py py_1 \"Date\")) \"Day\")) (rm py_1))",session); py_1 = null;
    Frame py_2 = DKV.getGet(Key.make("py_2"));
    for( Vec vec : py_2.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key3 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key2+1,key3); // Exactly 1 new vector
    
    // Start a series of computations that append columns
    Rapids.exec("(tmp= py_3 (append py_2 (month (cols_py py_2 \"Date\")) \"Month\"))",session);
    Frame py_3 = DKV.getGet(Key.make("py_3"));
    for( Vec vec : py_3.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key4 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key3+1,key4); // Exactly 1 new vector

    // This one does 2 computations to append 1 column, also does an over-write
    // instead of append.
    Rapids.exec("(, (rm py_2) (tmp= py_4 (:= py_3 (+ (year (cols_py py_3 \"Date\")) 1900) 17 [])))",session);
    Frame py_4 = DKV.getGet(Key.make("py_4"));   py_2 = null;
    for( Vec vec : py_4.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key5 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key4 + 2, key5); // Exactly 2 new vector, for two ops: "year" and "+1900".
    
    Rapids.exec("(, (rm py_3) (tmp= py_5 (append py_4 (week (cols_py py_4 \"Date\")) \"WeekNum\")))",session);
    Frame py_5 = DKV.getGet(Key.make("py_5"));  py_3 = null;
    for( Vec vec : py_5.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key6 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key5 + 1, key6); // Exactly 1 new vector

    Rapids.exec("(,  (rm py_4) (tmp= py_6 (append py_5 (dayOfWeek (cols_py py_5 \"Date\")) \"WeekDay\")))",session);
    Frame py_6 = DKV.getGet(Key.make("py_6"));  py_4 = null;
    for( Vec vec : py_6.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key7 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key6 + 1, key7); // Exactly 1 new vector

    Rapids.exec("(, (rm py_5) (tmp= py_7 (append py_6 (hour (cols_py py_6 \"Date\")) \"HourOfDay\")))",session);
    Frame py_7 = DKV.getGet(Key.make("py_7"));  py_5 = null;
    for( Vec vec : py_7.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key8 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key7 + 1, key8); // Exactly 1 new vector

    // A more involved expression; lots of internal temps
    Rapids.exec("(, (rm py_6) (tmp= py_8 (append py_7 (| (== (cols_py py_7 \"WeekDay\") \"Sun\") (== (cols_py py_7 \"WeekDay\") \"Sat\")) \"Weekend\")))",session);
    Frame py_8 = DKV.getGet(Key.make("py_8"));  py_6 = null;
    for( Vec vec : py_8.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key9 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key8 + 3, key9); // Exactly 3 new vectors, one for each of {==, ==, |}

    // A more involved expression; lots of internal temps
    Rapids.exec("(, (rm py_7) (tmp= py_9 (append py_8 (cut (cols_py py_8 \"Month\") [0 2 5 7 10 12] [\"Winter\" \"Spring\" \"Summer\" \"Autumn\" \"Winter\"] FALSE TRUE 3) \"Season\")))",session);
    Frame py_9 = DKV.getGet(Key.make("py_9"));  py_7 = null;
    for( Vec vec : py_9.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    int key10 = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key9 + 1, key10); // Exactly 1 new vector, despite lots of internal vecs

    // Drop a column
    Rapids.exec("(, (rm py_8) (tmp= py_10 (cols py_9 -3)))",session);
    Frame py_10 = DKV.getGet(Key.make("py_10"));  py_8 = null;
    for( Vec vec : py_10.vecs() ) vec.mean(); // Verify we can compute rollups on all cols; will crash if some cols are deleted
    key_tmp = DKV.<Vec.VectorGroup>getGet(vg._key).len(); // Pull latest value from DKV (no caching allowed)
    Assert.assertEquals(key10,key_tmp);  // No new vectors

    // End the session; freeing all resources
    session.end(null);

    // NO FINALLY FRAME DELETES HERE PLEASE...
    // Session ending should clean up; if it does not we need to detect the leak
  }
}
